import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
import os

os.chdir("C:/Users/agusv/Desktop/Estudio/Tesis/Csv")
data = pd.read_csv("multivariate_lng.csv", parse_dates=['Date'])
data.set_index('Date', inplace=True)

brent_series = data['Brent Spot Price']
inf_series = data['Inflation']
usd_eur_series = data['USD/EUR']
lng_prices = data['LNG Price']

train_data = data[data.index.year >= 2017]
X_training = train_data[['Brent Spot Price', 'Inflation', 'USD/EUR']]
y_training = train_data['LNG Price']

"""
from pmdarima import auto_arima
def fit_auto_arimax(series, exog=None, seasonal=True, m=12):
    auto_model = auto_arima(
        series,
        exogenous=exog,
        seasonal=seasonal,
        m=m,
        trace=True,
        error_action="ignore", 
        suppress_warnings=True,
        stepwise=True,
        n_jobs=-1
    )
    return auto_model

# Ajustar modelos auto ARIMAX para cada variable
brent_model = fit_auto_arimax(brent_series, seasonal=True, m=12)
ipc_model = fit_auto_arimax(inf_series, seasonal=True, m=12)
usd_eur_model = fit_auto_arimax(usd_eur_series, seasonal=True, m=12)
"""

sarimax_brent = SARIMAX(brent_series, order=(0, 1, 1), seasonal_order=(1, 1, 2, 12)).fit(disp=False)
sarimax_ipc = SARIMAX(inf_series, order=(1, 1, 1), seasonal_order=(0, 0, 1, 12)).fit(disp=False)
sarimax_usd_eur = SARIMAX(usd_eur_series, order=(1, 1, 0), seasonal_order=(0, 0, 0, 12)).fit(disp=False)

future_steps = (2030 - data.index[-1].year) * 12
future_index = pd.date_range(start=data.index[-1] + pd.DateOffset(months=1), periods=future_steps, freq='MS')

brent_forecast = sarimax_brent.get_forecast(steps=future_steps).predicted_mean
ipc_forecast = sarimax_ipc.get_forecast(steps=future_steps).predicted_mean
usd_eur_forecast = sarimax_usd_eur.get_forecast(steps=future_steps).predicted_mean

future_vars = pd.DataFrame({
    'Brent Spot Price': brent_forecast,
    'Variation': ipc_forecast,
    'USD/EUR': usd_eur_forecast
}, index=future_index)

linear_model = LinearRegression().fit(X_training, y_training)

linear_predictions = linear_model.predict(future_vars)

residuals = y_training - linear_model.predict(X_training)
"""
from pmdarima import auto_arima
def fit_auto_arimax(series, exog=None, seasonal=True, m=12):
    auto_model = auto_arima(
        series,
        exogenous=exog,
        seasonal=seasonal,
        m=m,
        trace=True,
        error_action="ignore", 
        suppress_warnings=True,
        stepwise=True,
        n_jobs=-1
    )
    return auto_model
ava = fit_auto_arimax(residuals, seasonal=True, m=12)
"""
sarimax_residuals = SARIMAX(residuals, order=(1, 0, 3), seasonal_order=(2, 1, 2, 12)).fit(disp=False)

residuals_forecast = sarimax_residuals.predict(start=future_index[0], end=future_index[-1])

final_predictions = linear_predictions + residuals_forecast

std_dev_residuals = np.std(residuals)
upper_bound = final_predictions + 1 * std_dev_residuals
lower_bound = final_predictions - 1 * std_dev_residuals

plt.figure(figsize=(14, 6))
plt.plot(data['LNG Price'], label="Real LNG Price", color="green")
plt.plot(future_index, final_predictions, label="Linear + SARIMAX Forecast", color="red", linestyle="--")
plt.fill_between(future_index, lower_bound, upper_bound, color='orange', alpha=0.3)
plt.xlabel("Date")
plt.ylabel("LNG Price [€/kg]")
plt.title("LNG Price Prediction (Linear + SARIMAX)")
plt.legend()
plt.show()

predictions_df = pd.DataFrame({
    'Date': future_index,
    'Predicted LNG Price': final_predictions,
    'Upper Bound': upper_bound,
    'Lower Bound': lower_bound
})


mean_LNG_2030 = predictions_df[predictions_df["Date"].dt.year == 2030]['Predicted LNG Price'].mean()
